library(monocle3)
library(tidySingleCellExperiment)
source('00_util_scripts/mod_seurat.R')

# cao 19 ------------
mobj <- readRDS('~/gene_count_cleaned.RDS')

mobj |> glimpse()

filtered.cells <- mobj |> colnames()

cell.meta <- read_delim('~/cell_annotate.csv.gz')

cell.meta <- cell.meta |>
  filter(sample %in% filtered.cells)

cell.meta |> count(development_stage)

cell.meta |>
  mutate(prefix = str_extract(sample, '.+\\.')) |>
  count(prefix)

sobj <- mobj |>
  CreateSeuratObject(names.delim = '.', min.cells = 3,
                     min.features = 200, meta.data = cell.meta)


necc.meta <- cell.meta |>
  mutate(.cell = sample, embryo_sex, development_stage,
         Main_cell_type, Main_trajectory_refined_by_cluster,
         Sub_trajectory_name,
         .keep = 'none')

sobj <- sobj |>
  left_join(necc.meta)

sobj <- sobj |> NormalizeData()

sobj |> DotPlot()

ttc7a <- sobj |> rownames() |> str_subset('36918')

sobj <- sobj |> mutate(stage.main.traj = str_c(development_stage,
                                          Main_trajectory_refined_by_cluster,
                                          sep = '_'),
                       stage.sub.traj = str_c(development_stage, 
                                              Sub_trajectory_name,
                                              sep = '_'),
                       stage.main.type = str_c(development_stage, 
                                               Main_cell_type,
                                               sep = '_'))

g1 <- sobj |> DotPlot(ttc7a, group.by = 'stage.main.traj')

g1$data |>
  separate_wider_delim(id, delim = '_', names = c('time', 'main.trajectory')) |>
  mutate(time = fct_reorder(time, as.numeric(time))) |>
  ggplot(aes(time, main.trajectory, size = pct.exp, color = avg.exp.scaled)) +
  geom_point() +
  theme_pubr(legend = 'right') +
  scale_color_distiller(palette = 'RdYlBu') +
  labs(title = 'Ttc7a expression in mouse embryo',
       y = 'Main trajectory', size = 'Percent expressed',
       color = 'Average expression')

g2 <- sobj |> DotPlot(ttc7a, group.by = 'stage.sub.traj')

g2$data |>
  separate_wider_delim(id, delim = '_', names = c('time', 'type')) |>
  filter(str_ends(type, 'y')) |>
  mutate(time = fct_reorder(time, as.numeric(time)),
         type = str_remove(type, ' trajectory'),
         type = fct_reorder(type, -avg.exp.scaled)) |>
  ggplot(aes(time, type, size = pct.exp, color = avg.exp.scaled)) +
  geom_point() +
  theme_pubr(legend = 'right', x.text.angle = 60) +
  scale_color_distiller(palette = 'RdYlBu') +
  labs(title = 'Ttc7a expression in mouse embryo',
       y = 'Sub trajectory', size = 'Percent expressed',
       color = 'Average expression') +
  coord_flip()

g3 <- sobj |> DotPlot(ttc7a, group.by = 'stage.main.type')

g3$data |>
  separate_wider_delim(id, delim = '_', names = c('time', 'type')) |>
  mutate(type = fct_reorder(type, -avg.exp.scaled),
         time = fct_reorder(time, as.numeric(time))) |>
  ggplot(aes(time, type, size = pct.exp, color = avg.exp.scaled)) +
  geom_point() +
  theme_pubr(legend = 'right', x.text.angle = 45) +
  scale_color_distiller(palette = 'RdYlBu') +
  labs(title = 'Ttc7a expression in mouse embryo',
       y = 'Main cell type', size = 'Percent expressed',
       color = 'Average expression') +
  coord_flip()

embryo.ttc7 <- sobj |> get_abundance_sc_long(ttc7a)

embryo.ttc7[,-1] |> write_csv('embryo.ttc7.csv')

# chen22 ------------
chen22.ttc7 <- read_tsv('mission/Duan_GSDMx/annotation-timepoint.tsv')

chen22.tidy <- chen22.ttc7 |>
  pivot_longer(-1) |>
  separate_wider_delim(name, delim = regex('_E|:'),
                       names = c('tissue','time','mapping')) |>
  pivot_wider(names_from = mapping, values_from = value)

chen22.tidy |>
  mutate(time = fct_reorder(time, as.numeric(time)),
         mean = scale(mean)[,1],
         tissue = fct_reorder(tissue, mean, .fun = sum)) |>
  ggplot(aes(time, tissue, size = percent_expressed, color = mean)) +
  geom_point() +
  scale_color_distiller(palette = 'RdYlBu') +
  theme_pubr(legend = 'right') +
  labs(title = 'Ttc7a expression in mouse embryo',
       subtitle = 'Chen, et al. Cell. 2022',
       x = 'Time (day)')

chen22.tidy |>
  ggplot(aes(as.numeric(time), mean, color = tissue)) +
  geom_path() +
  geom_point(aes(size = percent_expressed)) +
  scale_color_viridis_d(begin = .1, option = 'turbo')

plotly::ggplotly()

chen22.ttc7.top20 <- chen22.tidy |>
  summarise(sum.expr = sum(mean), .by = tissue) |>
  slice_max(sum.expr, n = 36) |>
  left_join(chen22.tidy)

chen22.ttc7.top20 |>
  mutate(time = fct_reorder(time, as.numeric(time)),
         mean = scale(mean)[,1],
         tissue = fct_reorder(tissue, mean, .fun = sum)) |>
  ggplot(aes(time, tissue, size = percent_expressed, color = mean)) +
  geom_point() +
  scale_color_distiller(palette = 'RdYlBu') +
  theme_pubr(legend = 'right') +
  labs(title = 'Ttc7a expression in mouse embryo',
       subtitle = 'Chen, et al. Cell. 2022',
       x = 'Time (day)', color = 'mean expression')

chen22.ttc7.top20 |>
  ggplot(aes(as.numeric(time), mean, color = tissue)) +
  geom_path() +
  geom_point(aes(size = percent_expressed)) +
  scale_color_manual(values = DiscretePalette(20))
